In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
In [2]:
df=pd.read_csv("vehicle.csv")
In [3]:
df.head()
Out[3]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
In [4]:
df.shape
Out[4]:
(846, 19)
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
compactness                    846 non-null int64
circularity                    841 non-null float64
distance_circularity           842 non-null float64
radius_ratio                   840 non-null float64
pr.axis_aspect_ratio           844 non-null float64
max.length_aspect_ratio        846 non-null int64
scatter_ratio                  845 non-null float64
elongatedness                  845 non-null float64
pr.axis_rectangularity         843 non-null float64
max.length_rectangularity      846 non-null int64
scaled_variance                843 non-null float64
scaled_variance.1              844 non-null float64
scaled_radius_of_gyration      844 non-null float64
scaled_radius_of_gyration.1    842 non-null float64
skewness_about                 840 non-null float64
skewness_about.1               845 non-null float64
skewness_about.2               845 non-null float64
hollows_ratio                  846 non-null int64
class                          846 non-null object
dtypes: float64(14), int64(4), object(1)
memory usage: 125.7+ KB
In [6]:
df.describe().transpose()
Out[6]:
count mean std min 25% 50% 75% max
compactness 846.0 93.678487 8.234474 73.0 87.00 93.0 100.0 119.0
circularity 841.0 44.828775 6.152172 33.0 40.00 44.0 49.0 59.0
distance_circularity 842.0 82.110451 15.778292 40.0 70.00 80.0 98.0 112.0
radius_ratio 840.0 168.888095 33.520198 104.0 141.00 167.0 195.0 333.0
pr.axis_aspect_ratio 844.0 61.678910 7.891463 47.0 57.00 61.0 65.0 138.0
max.length_aspect_ratio 846.0 8.567376 4.601217 2.0 7.00 8.0 10.0 55.0
scatter_ratio 845.0 168.901775 33.214848 112.0 147.00 157.0 198.0 265.0
elongatedness 845.0 40.933728 7.816186 26.0 33.00 43.0 46.0 61.0
pr.axis_rectangularity 843.0 20.582444 2.592933 17.0 19.00 20.0 23.0 29.0
max.length_rectangularity 846.0 147.998818 14.515652 118.0 137.00 146.0 159.0 188.0
scaled_variance 843.0 188.631079 31.411004 130.0 167.00 179.0 217.0 320.0
scaled_variance.1 844.0 439.494076 176.666903 184.0 318.00 363.5 587.0 1018.0
scaled_radius_of_gyration 844.0 174.709716 32.584808 109.0 149.00 173.5 198.0 268.0
scaled_radius_of_gyration.1 842.0 72.447743 7.486190 59.0 67.00 71.5 75.0 135.0
skewness_about 840.0 6.364286 4.920649 0.0 2.00 6.0 9.0 22.0
skewness_about.1 845.0 12.602367 8.936081 0.0 5.00 11.0 19.0 41.0
skewness_about.2 845.0 188.919527 6.155809 176.0 184.00 188.0 193.0 206.0
hollows_ratio 846.0 195.632388 7.438797 181.0 190.25 197.0 201.0 211.0
In [7]:
df_copy=df.copy()
df_copy.shape
Out[7]:
(846, 19)
In [8]:
# We can see that there are 'NaN' values in few columns.
df_copy.isna().sum()
Out[8]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [9]:
# attribute that contains Zero values.
print((df_copy == 0).sum())
compactness                     0
circularity                     0
distance_circularity            0
radius_ratio                    0
pr.axis_aspect_ratio            0
max.length_aspect_ratio         0
scatter_ratio                   0
elongatedness                   0
pr.axis_rectangularity          0
max.length_rectangularity       0
scaled_variance                 0
scaled_variance.1               0
scaled_radius_of_gyration       0
scaled_radius_of_gyration.1     0
skewness_about                 77
skewness_about.1               30
skewness_about.2                0
hollows_ratio                   0
class                           0
dtype: int64
In [10]:
"""
<b>Now skewness_about,skewness_about.1 contains 77 and 30 Zero(0) values respectively.Earlier though was to replace zeros but
i think these are not errors.so will keep them as it.<b>
""" 
Out[10]:
'\n<b>Now skewness_about,skewness_about.1 contains 77 and 30 Zero(0) values respectively.Earlier though was to replace zeros but\ni think these are not errors.so will keep them as it.<b>\n'
In [11]:
print((df_copy==0).sum())
compactness                     0
circularity                     0
distance_circularity            0
radius_ratio                    0
pr.axis_aspect_ratio            0
max.length_aspect_ratio         0
scatter_ratio                   0
elongatedness                   0
pr.axis_rectangularity          0
max.length_rectangularity       0
scaled_variance                 0
scaled_variance.1               0
scaled_radius_of_gyration       0
scaled_radius_of_gyration.1     0
skewness_about                 77
skewness_about.1               30
skewness_about.2                0
hollows_ratio                   0
class                           0
dtype: int64
In [12]:
df_copy.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
compactness                    846 non-null int64
circularity                    841 non-null float64
distance_circularity           842 non-null float64
radius_ratio                   840 non-null float64
pr.axis_aspect_ratio           844 non-null float64
max.length_aspect_ratio        846 non-null int64
scatter_ratio                  845 non-null float64
elongatedness                  845 non-null float64
pr.axis_rectangularity         843 non-null float64
max.length_rectangularity      846 non-null int64
scaled_variance                843 non-null float64
scaled_variance.1              844 non-null float64
scaled_radius_of_gyration      844 non-null float64
scaled_radius_of_gyration.1    842 non-null float64
skewness_about                 840 non-null float64
skewness_about.1               845 non-null float64
skewness_about.2               845 non-null float64
hollows_ratio                  846 non-null int64
class                          846 non-null object
dtypes: float64(14), int64(4), object(1)
memory usage: 125.7+ KB
In [13]:
# Now we will check total NaN values in all columns
df_copy.isna().sum()
Out[13]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [14]:
"""
We can see we have columns with 'NaN' values.We will replace them with mean/median accordingly.
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
""" 
"""
I can't plot distplot as columns contains NaN values.So i will remove NaN and 
then check the data is normally distributed or not
"""

# df_copy_circularity=df_copy['circularity'].dropna(axis=0)
# df_copy_distance_circularity= df_copy['distance_circularity'].dropna(axis=0)
# df_copy_radius_ratio= df_copy['radius_ratio'].dropna(axis=0)
# df_copy_praxis_aspect_ratio= df_copy['pr.axis_aspect_ratio'].dropna(axis=0)
# df_copy_scatter_ratio= df_copy['scatter_ratio'].dropna(axis=0)
# df_copy_elongatedness= df_copy['elongatedness'].dropna(axis=0)
# df_copy_praxis_rectangularity= df_copy['pr.axis_rectangularity'].dropna(axis=0)
# df_copy_scaled_variance= df_copy['scaled_variance'].dropna(axis=0)
# df_copy_scaled_variance1= df_copy['scaled_variance.1'].dropna(axis=0)
# df_copy_scaled_radius_of_gyration= df_copy['scaled_radius_of_gyration'].dropna(axis=0)
# df_copy_scaled_radius_of_gyration1= df_copy['scaled_radius_of_gyration.1'].dropna(axis=0)
# df_copy_skewness_about= df_copy['skewness_about'].dropna(axis=0)
# df_copy_skewness_about1= df_copy['skewness_about.1'].dropna(axis=0)
# df_copy_skewness_about2= df_copy['skewness_about.2'].dropna(axis=0)


# sns.distplot(df_copy_circularity)

# skewness_about=df_copy['skewness_about'].dropna()
# skewness_about.count()
Out[14]:
"\nI can't plot distplot as columns contains NaN values.So i will remove NaN and \nthen check the data is normally distributed or not\n"
In [15]:
# Class is categorical.will convert it to interger and then replace NaN.

df_copy['class']=df_copy['class'].astype('category').cat.codes
df_copy['class'].head()
Out[15]:
0    2
1    2
2    1
3    2
4    0
Name: class, dtype: int8
In [16]:
# Missing value changed by using Impurter or by using fillNa method.
# imputer=Imputer(missing_values='NaN',strategy='median',axis=0)

# transformed_X=imputer.fit_transform(df_copy)

# transformed_X.dtype
df_copy.isnull().sum()
Out[16]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [17]:
# Missing value changed by using Impurter or by using fillNa method.
# df_copy_With_No_NA=df_copy.fillna(df_copy.median,inplace=True)
# df_copy_With_No_NA
df_copy_With_No_NAN_Values=df_copy.fillna(df_copy.median())
df_copy_With_No_NAN_Values.isnull().sum()
Out[17]:
compactness                    0
circularity                    0
distance_circularity           0
radius_ratio                   0
pr.axis_aspect_ratio           0
max.length_aspect_ratio        0
scatter_ratio                  0
elongatedness                  0
pr.axis_rectangularity         0
max.length_rectangularity      0
scaled_variance                0
scaled_variance.1              0
scaled_radius_of_gyration      0
scaled_radius_of_gyration.1    0
skewness_about                 0
skewness_about.1               0
skewness_about.2               0
hollows_ratio                  0
class                          0
dtype: int64

Missing values are handeled by replacing with Median in the datasets.

Finding Outliers and Removing them

In [18]:
df_copy_With_No_NAN_Values.boxplot(figsize=(100,10))
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x25655d00400>
In [19]:
"""
We have outliers in radius_ratio,pr.axis_aspect_ratio,
max.length_aspect_ratio,scaled_variance,scaled_radius_of_gyration.1,skewness_about,skewness_about1
""" 
Out[19]:
'\nWe have outliers in radius_ratio,pr.axis_aspect_ratio,\nmax.length_aspect_ratio,scaled_variance,scaled_radius_of_gyration.1,skewness_about,skewness_about1\n'
In [20]:
Q1 = df_copy_With_No_NAN_Values.quantile(0.25)
Q3 =  df_copy_With_No_NAN_Values.quantile(0.75)
IQR = Q3 - Q1
print(IQR)
compactness                     13.00
circularity                      9.00
distance_circularity            28.00
radius_ratio                    54.00
pr.axis_aspect_ratio             8.00
max.length_aspect_ratio          3.00
scatter_ratio                   51.00
elongatedness                   13.00
pr.axis_rectangularity           4.00
max.length_rectangularity       22.00
scaled_variance                 50.00
scaled_variance.1              268.50
scaled_radius_of_gyration       49.00
scaled_radius_of_gyration.1      8.00
skewness_about                   7.00
skewness_about.1                14.00
skewness_about.2                 9.00
hollows_ratio                   10.75
class                            1.00
dtype: float64
In [21]:
df_copy_With_No_NAN_Values = df_copy_With_No_NAN_Values[~((df_copy_With_No_NAN_Values < (Q1 - 1.5 * IQR)) |(df_copy_With_No_NAN_Values > (Q3 + 1.5 * IQR))).any(axis=1)]
df_copy_With_No_NAN_Values.shape
Out[21]:
(813, 19)
In [22]:
"""
We can see in boxplot,outliers are removed
"""
df_copy_With_No_NAN_Values.boxplot(figsize=(100,10))
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x256575882b0>

Now we will find correlation to find relation between Independent Var

In [23]:
df_copy_With_No_NAN_Values_Corr=df_copy_With_No_NAN_Values.corr(method='pearson')
df_copy_With_No_NAN_Values_Corr
Out[23]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
compactness 1.000000 0.679327 0.786824 0.745988 0.200634 0.497660 0.810842 -0.788560 0.811866 0.674166 0.791464 0.812494 0.577688 -0.253678 0.200640 0.161106 0.295414 0.364160 -0.028066
circularity 0.679327 1.000000 0.786778 0.640936 0.206156 0.564159 0.843765 -0.815764 0.840162 0.961220 0.802762 0.832483 0.925142 0.066697 0.140391 -0.014342 -0.116832 0.039182 -0.150490
distance_circularity 0.786824 0.786778 1.000000 0.808331 0.245454 0.668833 0.905577 -0.908687 0.894783 0.768689 0.884133 0.888193 0.702483 -0.237938 0.098813 0.263508 0.128912 0.321655 -0.058524
radius_ratio 0.745988 0.640936 0.808331 1.000000 0.666209 0.460691 0.789523 -0.844025 0.763829 0.576791 0.792802 0.776142 0.550545 -0.403176 0.040306 0.187190 0.418354 0.504693 -0.225750
pr.axis_aspect_ratio 0.200634 0.206156 0.245454 0.666209 1.000000 0.138261 0.199971 -0.301918 0.169752 0.145857 0.214810 0.186297 0.153509 -0.325102 -0.051539 -0.028583 0.406292 0.419801 -0.216582
max.length_aspect_ratio 0.497660 0.564159 0.668833 0.460691 0.138261 1.000000 0.497466 -0.502377 0.497233 0.647844 0.411562 0.457892 0.404288 -0.332752 0.084005 0.140972 0.056416 0.394449 0.371072
scatter_ratio 0.810842 0.843765 0.905577 0.789523 0.199971 0.497466 1.000000 -0.972340 0.989441 0.807070 0.977548 0.992982 0.794915 0.002444 0.063471 0.214182 -0.003100 0.116181 -0.283363
elongatedness -0.788560 -0.815764 -0.908687 -0.844025 -0.301918 -0.502377 -0.972340 1.000000 -0.949943 -0.770034 -0.964903 -0.955797 -0.762407 0.086977 -0.045458 -0.183955 -0.105264 -0.211086 0.337818
pr.axis_rectangularity 0.811866 0.840162 0.894783 0.763829 0.169752 0.497233 0.989441 -0.949943 1.000000 0.810349 0.963794 0.987773 0.792197 0.017769 0.072726 0.216626 -0.026470 0.097951 -0.251226
max.length_rectangularity 0.674166 0.961220 0.768689 0.576791 0.145857 0.647844 0.807070 -0.770034 0.810349 1.000000 0.749677 0.794071 0.866940 0.052585 0.134630 -0.002441 -0.117667 0.067177 -0.025047
scaled_variance 0.791464 0.802762 0.884133 0.792802 0.214810 0.411562 0.977548 -0.964903 0.963794 0.749677 1.000000 0.975549 0.781022 0.016841 0.033947 0.205718 0.022776 0.095925 -0.329114
scaled_variance.1 0.812494 0.832483 0.888193 0.776142 0.186297 0.457892 0.992982 -0.955797 0.987773 0.794071 0.975549 1.000000 0.789833 0.016215 0.064877 0.203587 0.000078 0.103204 -0.282360
scaled_radius_of_gyration 0.577688 0.925142 0.702483 0.550545 0.153509 0.404288 0.794915 -0.762407 0.792197 0.866940 0.781022 0.789833 1.000000 0.216385 0.168766 -0.058292 -0.232332 -0.120579 -0.241530
scaled_radius_of_gyration.1 -0.253678 0.066697 -0.237938 -0.403176 -0.325102 -0.332752 0.002444 0.086977 0.017769 0.052585 0.016841 0.016215 0.216385 1.000000 -0.059264 -0.130981 -0.842590 -0.917291 -0.283782
skewness_about 0.200640 0.140391 0.098813 0.040306 -0.051539 0.084005 0.063471 -0.045458 0.072726 0.134630 0.033947 0.064877 0.168766 -0.059264 1.000000 -0.045298 0.084793 0.061136 0.129438
skewness_about.1 0.161106 -0.014342 0.263508 0.187190 -0.028583 0.140972 0.214182 -0.183955 0.216626 -0.002441 0.205718 0.203587 -0.058292 -0.130981 -0.045298 1.000000 0.072801 0.199910 -0.013094
skewness_about.2 0.295414 -0.116832 0.128912 0.418354 0.406292 0.056416 -0.003100 -0.105264 -0.026470 -0.117667 0.022776 0.000078 -0.232332 -0.842590 0.084793 0.072801 1.000000 0.889946 0.061319
hollows_ratio 0.364160 0.039182 0.321655 0.504693 0.419801 0.394449 0.116181 -0.211086 0.097951 0.067177 0.095925 0.103204 -0.120579 -0.917291 0.061136 0.199910 0.889946 1.000000 0.231102
class -0.028066 -0.150490 -0.058524 -0.225750 -0.216582 0.371072 -0.283363 0.337818 -0.251226 -0.025047 -0.329114 -0.282360 -0.241530 -0.283782 0.129438 -0.013094 0.061319 0.231102 1.000000
In [24]:
# plot the heatmap
sns.heatmap(df_copy_With_No_NAN_Values_Corr, 
        xticklabels=df_copy_With_No_NAN_Values_Corr.columns,
        yticklabels=df_copy_With_No_NAN_Values_Corr.columns)
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x25656316278>
In [25]:
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)

def magnify():
    return [dict(selector="th",
                 props=[("font-size", "7pt")]),
            dict(selector="td",
                 props=[('padding', "0em 0em")]),
            dict(selector="th:hover",
                 props=[("font-size", "12pt")]),
            dict(selector="tr:hover td:hover",
                 props=[('max-width', '200px'),
                        ('font-size', '12pt')])
]

df_copy_With_No_NAN_Values_Corr.style.background_gradient(cmap, axis=1)\
    .set_properties(**{'max-width': '80px', 'font-size': '10pt'})\
    .set_caption("Hover to magify")\
    .set_precision(2)\
    .set_table_styles(magnify())
Out[25]:
Hover to magify
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
compactness 1 0.68 0.79 0.75 0.2 0.5 0.81 -0.79 0.81 0.67 0.79 0.81 0.58 -0.25 0.2 0.16 0.3 0.36 -0.028
circularity 0.68 1 0.79 0.64 0.21 0.56 0.84 -0.82 0.84 0.96 0.8 0.83 0.93 0.067 0.14 -0.014 -0.12 0.039 -0.15
distance_circularity 0.79 0.79 1 0.81 0.25 0.67 0.91 -0.91 0.89 0.77 0.88 0.89 0.7 -0.24 0.099 0.26 0.13 0.32 -0.059
radius_ratio 0.75 0.64 0.81 1 0.67 0.46 0.79 -0.84 0.76 0.58 0.79 0.78 0.55 -0.4 0.04 0.19 0.42 0.5 -0.23
pr.axis_aspect_ratio 0.2 0.21 0.25 0.67 1 0.14 0.2 -0.3 0.17 0.15 0.21 0.19 0.15 -0.33 -0.052 -0.029 0.41 0.42 -0.22
max.length_aspect_ratio 0.5 0.56 0.67 0.46 0.14 1 0.5 -0.5 0.5 0.65 0.41 0.46 0.4 -0.33 0.084 0.14 0.056 0.39 0.37
scatter_ratio 0.81 0.84 0.91 0.79 0.2 0.5 1 -0.97 0.99 0.81 0.98 0.99 0.79 0.0024 0.063 0.21 -0.0031 0.12 -0.28
elongatedness -0.79 -0.82 -0.91 -0.84 -0.3 -0.5 -0.97 1 -0.95 -0.77 -0.96 -0.96 -0.76 0.087 -0.045 -0.18 -0.11 -0.21 0.34
pr.axis_rectangularity 0.81 0.84 0.89 0.76 0.17 0.5 0.99 -0.95 1 0.81 0.96 0.99 0.79 0.018 0.073 0.22 -0.026 0.098 -0.25
max.length_rectangularity 0.67 0.96 0.77 0.58 0.15 0.65 0.81 -0.77 0.81 1 0.75 0.79 0.87 0.053 0.13 -0.0024 -0.12 0.067 -0.025
scaled_variance 0.79 0.8 0.88 0.79 0.21 0.41 0.98 -0.96 0.96 0.75 1 0.98 0.78 0.017 0.034 0.21 0.023 0.096 -0.33
scaled_variance.1 0.81 0.83 0.89 0.78 0.19 0.46 0.99 -0.96 0.99 0.79 0.98 1 0.79 0.016 0.065 0.2 7.8e-05 0.1 -0.28
scaled_radius_of_gyration 0.58 0.93 0.7 0.55 0.15 0.4 0.79 -0.76 0.79 0.87 0.78 0.79 1 0.22 0.17 -0.058 -0.23 -0.12 -0.24
scaled_radius_of_gyration.1 -0.25 0.067 -0.24 -0.4 -0.33 -0.33 0.0024 0.087 0.018 0.053 0.017 0.016 0.22 1 -0.059 -0.13 -0.84 -0.92 -0.28
skewness_about 0.2 0.14 0.099 0.04 -0.052 0.084 0.063 -0.045 0.073 0.13 0.034 0.065 0.17 -0.059 1 -0.045 0.085 0.061 0.13
skewness_about.1 0.16 -0.014 0.26 0.19 -0.029 0.14 0.21 -0.18 0.22 -0.0024 0.21 0.2 -0.058 -0.13 -0.045 1 0.073 0.2 -0.013
skewness_about.2 0.3 -0.12 0.13 0.42 0.41 0.056 -0.0031 -0.11 -0.026 -0.12 0.023 7.8e-05 -0.23 -0.84 0.085 0.073 1 0.89 0.061
hollows_ratio 0.36 0.039 0.32 0.5 0.42 0.39 0.12 -0.21 0.098 0.067 0.096 0.1 -0.12 -0.92 0.061 0.2 0.89 1 0.23
class -0.028 -0.15 -0.059 -0.23 -0.22 0.37 -0.28 0.34 -0.25 -0.025 -0.33 -0.28 -0.24 -0.28 0.13 -0.013 0.061 0.23 1
In [26]:
# Find inferences
"""
Compactness shows high significance with circularity,distance_circularity,radius_ratio,
scatter_ratio,elongatedness in a negative way,pr.axis_rectangularity,max.length_rectangularity,
scaled_variance,scaled_variance.1



""" 
#Let us check for pair plots
sns.pairplot(df_copy_With_No_NAN_Values,diag_kind='kde')
Out[26]:
<seaborn.axisgrid.PairGrid at 0x1b277accc18>

3. Use PCA from scikit learn and elbow plot to find out reduced number of dimension (which covers more than 95% of the variance) - 10 points

Create a covariance matrix for identifying Principal components

In [26]:
# In this class is a dependent variable.So removing it for PCA.
X=df_copy_With_No_NAN_Values.drop(['class'],axis=1)
X.head()
y=df_copy_With_No_NAN_Values[['class']]
sns.pairplot(X,diag_kind='kde')
Out[26]:
<seaborn.axisgrid.PairGrid at 0x25657015668>
In [27]:
"""
We will scale the data with the help of zscore.It is used to remove the influence of one attribute over others in terms of 
units.
""" 
from scipy.stats import zscore
XScaled=X.apply(zscore)
XScaled.head()
Out[27]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 0.172252 0.524273 0.054413 0.320514 1.923745 0.917511 -0.210249 0.143541 -0.226542 0.763046 -0.393996 -0.345187 0.297468 -0.323897 -0.038246 0.379490 -0.324770 0.176759
1 -0.324348 -0.621958 0.118805 -0.852833 -0.743688 0.429013 -0.607056 0.532106 -0.618411 -0.342266 -0.592152 -0.626997 -0.505992 -0.000795 0.609548 0.155190 0.002617 0.449185
2 1.289602 0.851768 1.535438 1.303589 0.856772 0.917511 1.163315 -1.151676 0.949066 0.693964 1.158224 1.127127 1.409952 0.160756 1.689205 -0.405562 -0.161076 0.040546
3 -0.076048 -0.621958 -0.009980 -0.282015 0.323285 0.429013 -0.759675 0.661628 -0.618411 -0.342266 -0.922412 -0.747772 -1.463965 -1.454753 -0.038246 -0.293412 1.639554 1.538891
5 1.662052 -0.130716 1.535438 0.130242 -1.988490 -1.036481 2.628450 -1.928806 2.908414 1.453866 3.040704 2.979021 2.769655 2.099367 -0.254177 -0.405562 -1.306932 -1.730226
In [28]:
#  Create covariance matrix

cov_matrix = np.cov(XScaled,rowvar=False)
print('Covariance Matrix \n%s', cov_matrix)
Covariance Matrix 
%s [[ 1.00123153e+00  6.80164027e-01  7.87792814e-01  7.46906930e-01
   2.00881439e-01  4.98273207e-01  8.11840645e-01 -7.89531434e-01
   8.12866245e-01  6.74996601e-01  7.92438680e-01  8.13494150e-01
   5.78399755e-01 -2.53990635e-01  2.00887113e-01  1.61304844e-01
   2.95777412e-01  3.64608943e-01]
 [ 6.80164027e-01  1.00123153e+00  7.87747162e-01  6.41725205e-01
   2.06409699e-01  5.64854067e-01  8.44804611e-01 -8.16768295e-01
   8.41196310e-01  9.62404205e-01  8.03750964e-01  8.33508154e-01
   9.26281607e-01  6.67790806e-02  1.40563881e-01 -1.43598307e-02
  -1.16976151e-01  3.92302597e-02]
 [ 7.87792814e-01  7.87747162e-01  1.00123153e+00  8.09326627e-01
   2.45756551e-01  6.69657073e-01  9.06692225e-01 -9.09806087e-01
   8.95884623e-01  7.69635504e-01  8.85221631e-01  8.89286924e-01
   7.03348558e-01 -2.38231284e-01  9.89345733e-02  2.63832735e-01
   1.29070982e-01  3.22051625e-01]
 [ 7.46906930e-01  6.41725205e-01  8.09326627e-01  1.00123153e+00
   6.67029240e-01  4.61258592e-01  7.90495472e-01 -8.45064567e-01
   7.64769672e-01  5.77501217e-01  7.93778346e-01  7.77097647e-01
   5.51222677e-01 -4.03672885e-01  4.03555670e-02  1.87420711e-01
   4.18869167e-01  5.05314324e-01]
 [ 2.00881439e-01  2.06409699e-01  2.45756551e-01  6.67029240e-01
   1.00123153e+00  1.38431761e-01  2.00217560e-01 -3.02289321e-01
   1.69961019e-01  1.46036511e-01  2.15074904e-01  1.86526180e-01
   1.53697623e-01 -3.25502385e-01 -5.16026240e-02 -2.86185855e-02
   4.06792617e-01  4.20318003e-01]
 [ 4.98273207e-01  5.64854067e-01  6.69657073e-01  4.61258592e-01
   1.38431761e-01  1.00123153e+00  4.98078976e-01 -5.02996017e-01
   4.97845069e-01  6.48642021e-01  4.12068816e-01  4.58456162e-01
   4.04786322e-01 -3.33161873e-01  8.41082601e-02  1.41145578e-01
   5.64852182e-02  3.94934461e-01]
 [ 8.11840645e-01  8.44804611e-01  9.06692225e-01  7.90495472e-01
   2.00217560e-01  4.98078976e-01  1.00123153e+00 -9.73537513e-01
   9.90659730e-01  8.08063766e-01  9.78751548e-01  9.94204811e-01
   7.95893849e-01  2.44702588e-03  6.35490363e-02  2.14445853e-01
  -3.10409338e-03  1.16323654e-01]
 [-7.89531434e-01 -8.16768295e-01 -9.09806087e-01 -8.45064567e-01
  -3.02289321e-01 -5.02996017e-01 -9.73537513e-01  1.00123153e+00
  -9.51112661e-01 -7.70982661e-01 -9.66090990e-01 -9.56973892e-01
  -7.63345981e-01  8.70842667e-02 -4.55135596e-02 -1.84181395e-01
  -1.05393355e-01 -2.11345600e-01]
 [ 8.12866245e-01  8.41196310e-01  8.95884623e-01  7.64769672e-01
   1.69961019e-01  4.97845069e-01  9.90659730e-01 -9.51112661e-01
   1.00123153e+00  8.11346565e-01  9.64981168e-01  9.88989478e-01
   7.93172901e-01  1.77904437e-02  7.28156271e-02  2.16892797e-01
  -2.65026808e-02  9.80719286e-02]
 [ 6.74996601e-01  9.62404205e-01  7.69635504e-01  5.77501217e-01
   1.46036511e-01  6.48642021e-01  8.08063766e-01 -7.70982661e-01
   8.11346565e-01  1.00123153e+00  7.50600479e-01  7.95049173e-01
   8.68007898e-01  5.26495142e-02  1.34795631e-01 -2.44448372e-03
  -1.17812145e-01  6.72596198e-02]
 [ 7.92438680e-01  8.03750964e-01  8.85221631e-01  7.93778346e-01
   2.15074904e-01  4.12068816e-01  9.78751548e-01 -9.66090990e-01
   9.64981168e-01  7.50600479e-01  1.00123153e+00  9.76750881e-01
   7.81984129e-01  1.68621531e-02  3.39888849e-02  2.05971428e-01
   2.28035846e-02  9.60435931e-02]
 [ 8.13494150e-01  8.33508154e-01  8.89286924e-01  7.77097647e-01
   1.86526180e-01  4.58456162e-01  9.94204811e-01 -9.56973892e-01
   9.88989478e-01  7.95049173e-01  9.76750881e-01  1.00123153e+00
   7.90805725e-01  1.62348310e-02  6.49567636e-02  2.03838067e-01
   7.85566308e-05  1.03330899e-01]
 [ 5.78399755e-01  9.26281607e-01  7.03348558e-01  5.51222677e-01
   1.53697623e-01  4.04786322e-01  7.95893849e-01 -7.63345981e-01
   7.93172901e-01  8.68007898e-01  7.81984129e-01  7.90805725e-01
   1.00123153e+00  2.16651698e-01  1.68973862e-01 -5.83635746e-02
  -2.32617810e-01 -1.20727281e-01]
 [-2.53990635e-01  6.67790806e-02 -2.38231284e-01 -4.03672885e-01
  -3.25502385e-01 -3.33161873e-01  2.44702588e-03  8.70842667e-02
   1.77904437e-02  5.26495142e-02  1.68621531e-02  1.62348310e-02
   2.16651698e-01  1.00123153e+00 -5.93373719e-02 -1.31142620e-01
  -8.43627948e-01 -9.18420730e-01]
 [ 2.00887113e-01  1.40563881e-01  9.89345733e-02  4.03555670e-02
  -5.16026240e-02  8.41082601e-02  6.35490363e-02 -4.55135596e-02
   7.28156271e-02  1.34795631e-01  3.39888849e-02  6.49567636e-02
   1.68973862e-01 -5.93373719e-02  1.00123153e+00 -4.53538836e-02
   8.48972195e-02  6.12111362e-02]
 [ 1.61304844e-01 -1.43598307e-02  2.63832735e-01  1.87420711e-01
  -2.86185855e-02  1.41145578e-01  2.14445853e-01 -1.84181395e-01
   2.16892797e-01 -2.44448372e-03  2.05971428e-01  2.03838067e-01
  -5.83635746e-02 -1.31142620e-01 -4.53538836e-02  1.00123153e+00
   7.28908031e-02  2.00156475e-01]
 [ 2.95777412e-01 -1.16976151e-01  1.29070982e-01  4.18869167e-01
   4.06792617e-01  5.64852182e-02 -3.10409338e-03 -1.05393355e-01
  -2.65026808e-02 -1.17812145e-01  2.28035846e-02  7.85566308e-05
  -2.32617810e-01 -8.43627948e-01  8.48972195e-02  7.28908031e-02
   1.00123153e+00  8.91041674e-01]
 [ 3.64608943e-01  3.92302597e-02  3.22051625e-01  5.05314324e-01
   4.20318003e-01  3.94934461e-01  1.16323654e-01 -2.11345600e-01
   9.80719286e-02  6.72596198e-02  9.60435931e-02  1.03330899e-01
  -1.20727281e-01 -9.18420730e-01  6.12111362e-02  2.00156475e-01
   8.91041674e-01  1.00123153e+00]]

Identify eigen values and eigen vector

In [29]:
# Step 2- Get eigen values and eigen vector
eig_vals, eig_vecs = np.linalg.eig(cov_matrix)
print('Eigen Vectors \n%s', eig_vecs)
print('\n Eigen Values \n%s', eig_vals)
Eigen Vectors 
%s [[-2.72251046e-01 -8.97284818e-02  2.26045073e-02  1.30419032e-01
  -1.52324139e-01  2.58374578e-01 -1.88794221e-01 -7.71578238e-01
  -3.61784776e-01 -1.25233628e-01  2.92009470e-02  7.62442008e-04
  -1.06680587e-02  1.05983722e-02 -1.01407495e-01 -1.46326861e-01
  -3.81638532e-03  3.32992130e-03]
 [-2.85370045e-01  1.33173937e-01  2.10809943e-01 -2.06785531e-02
   1.39022591e-01 -6.88979940e-02  3.90871235e-01 -6.60528436e-02
  -4.62957583e-02  2.40262612e-01  7.29503235e-02  1.93799916e-01
  -7.74670931e-03 -8.71766559e-02 -3.11337823e-01  1.96463651e-01
  -2.96230720e-01  5.83996136e-01]
 [-3.01486231e-01 -4.40259591e-02 -7.08780817e-02  1.07425217e-01
   8.07335409e-02 -2.04800896e-02 -1.76384547e-01  2.98693883e-01
  -2.64499195e-01 -9.42971834e-02  7.78755026e-01 -2.32649049e-01
   1.11905744e-02  2.28724292e-02  5.89166755e-02  5.33931974e-02
   9.72735293e-02  8.64160083e-02]
 [-2.72594510e-01 -2.04232234e-01 -4.02139629e-02 -2.52957341e-01
  -1.19012554e-01 -1.39449676e-01 -1.56474448e-01  5.20410402e-02
  -1.70430331e-01  8.97062530e-02 -1.31647081e-01  2.75143903e-01
  -3.74689248e-02  2.90668794e-02 -2.04574984e-01  6.58916577e-01
   2.74900989e-01 -2.71300494e-01]
 [-9.85797647e-02 -2.59136858e-01  1.14805227e-01 -6.05228001e-01
  -8.32128223e-02 -5.87145492e-01 -1.02492950e-01 -1.61872497e-01
   1.17212341e-02  2.87528583e-02  4.97534613e-02 -1.45558629e-01
   2.09842091e-02 -9.40948646e-03  1.50893891e-01 -2.89610835e-01
  -1.19100067e-01  9.64017331e-02]
 [-1.94755787e-01 -9.45756320e-02  1.39313484e-01  3.22531411e-01
   6.21376071e-01 -2.65624695e-01 -3.98851794e-01 -5.85800952e-02
   1.73213170e-01 -2.49937617e-01 -1.98444456e-01  1.72600201e-01
  -1.06888298e-02  1.20980507e-02  1.76055013e-01  6.68511988e-02
  -2.92959443e-02  1.10841470e-01]
 [-3.10518442e-01  7.23350799e-02 -1.12924698e-01 -1.00540370e-02
  -8.12405608e-02  8.93335163e-02 -9.14237336e-02  8.45300921e-02
   1.37499298e-01  1.11244025e-01 -1.61642905e-01 -8.22439493e-02
   8.37148260e-01  2.72442207e-01 -1.51805844e-02 -7.66778803e-02
   5.60355480e-02  8.33248999e-02]
 [ 3.08438338e-01 -1.16876769e-02  9.00330455e-02  7.99117560e-02
   7.47379231e-02 -7.25853857e-02  1.04875746e-01 -2.16815347e-01
  -2.59988735e-01  1.24837047e-01 -4.29365477e-03 -3.50089602e-01
   2.42295907e-01  2.61394487e-03  4.61164909e-01  5.23226723e-01
  -2.65096114e-01 -1.36447171e-02]
 [-3.07548493e-01  8.40915278e-02 -1.11063547e-01  1.60464922e-02
  -7.75020996e-02  9.60554272e-02 -9.06723384e-02  3.37069994e-02
   1.03269951e-01  2.11468012e-01 -2.40841717e-01 -3.42527317e-01
  -9.86931593e-02 -6.84892390e-01  2.18872117e-01  2.39504315e-02
   2.70709305e-01  1.72817545e-01]
 [-2.76301073e-01  1.25836631e-01  2.19877688e-01  6.66507863e-02
   2.46140560e-01 -6.35014904e-02  3.49667685e-01 -2.26684736e-01
   2.44776407e-01  3.87473859e-01  2.24580349e-01  3.05154380e-02
  -1.40549391e-02  4.47385929e-02  1.53765067e-01 -1.04419937e-01
   1.53673085e-01 -5.43122947e-01]
 [-3.02748114e-01  7.01998575e-02 -1.44818765e-01 -6.98045095e-02
  -1.49584067e-01  1.34458896e-01 -7.54753072e-02  1.45772665e-01
   5.85239946e-02 -1.47036092e-01  2.06902072e-02  2.33368955e-01
   1.43866319e-02 -2.54510995e-01  1.79499013e-01  1.16604375e-02
  -7.26163025e-01 -3.24937516e-01]
 [-3.07040626e-01  7.79336637e-02 -1.15323952e-01 -1.73631584e-02
  -1.15117310e-01  1.26968672e-01 -6.99641470e-02  5.32611781e-02
   1.28904560e-01  1.60305310e-01 -1.96322990e-01 -2.75169550e-01
  -4.75672122e-01  6.13103868e-01  2.20362642e-01  7.99305617e-02
  -1.22815848e-01  1.42051799e-01]
 [-2.61520489e-01  2.09927277e-01  2.13627435e-01 -7.22457181e-02
   7.54871674e-03 -7.33961842e-02  4.55851958e-01  1.58194670e-01
  -3.37170589e-01 -5.87690102e-01 -2.58436921e-01 -1.07063554e-01
   8.61256926e-03  4.41891377e-02  1.43753708e-01 -5.21969873e-02
   1.69567965e-01 -8.32177228e-02]
 [ 4.36323635e-02  5.03914450e-01 -6.73920886e-02 -1.35860558e-01
  -1.40527774e-01 -1.31928871e-01 -7.90311042e-02 -3.00374428e-01
   5.01365221e-01 -3.87030017e-01  2.27875444e-01 -1.38958435e-01
   7.55464886e-03 -1.59765660e-02 -1.34656976e-01  3.04769192e-01
   5.39469506e-02  3.01217731e-02]
 [-3.67057041e-02 -1.45682524e-02  5.21623444e-01  4.90121679e-01
  -5.89800103e-01 -3.12415086e-01 -1.30187397e-01  1.14687509e-01
   7.50393829e-02  5.41502565e-02 -1.39861362e-02  5.61401152e-03
  -2.19811008e-03 -5.03222786e-03 -1.37166771e-02 -4.76724453e-03
  -3.27151282e-02 -2.14301813e-02]
 [-5.88504115e-02 -9.33980545e-02 -6.87170643e-01  3.80232477e-01
  -1.27793729e-01 -4.82506903e-01  3.10629290e-01 -1.18168951e-01
  -3.07213623e-02 -1.36044539e-02 -1.77010708e-02  8.59021362e-02
  -1.39575997e-02  1.10992435e-02  2.72433694e-02 -2.97178011e-02
   1.82173722e-02  1.83842486e-02]
 [-3.48373860e-02 -5.01664210e-01  6.22069465e-02 -3.55391597e-02
  -1.81582693e-01  2.75222340e-01  2.59557864e-01 -7.27008273e-02
   3.62122453e-01 -2.20343289e-01  1.73696003e-01  2.79657886e-01
   3.82401827e-02  7.76499049e-03  4.14581122e-01  1.14797284e-01
   1.66961820e-01  2.41026732e-01]
 [-8.28136172e-02 -5.06546563e-01  4.08035393e-02  1.03008417e-01
   1.11256244e-01  6.05771535e-02  1.76348774e-01  1.81034286e-02
   2.40710780e-01 -1.71416688e-01 -7.22825606e-02 -5.36171185e-01
   3.98716359e-03 -4.78049584e-02 -4.65683959e-01  8.53480643e-02
  -1.96223612e-01 -1.78387852e-01]]

 Eigen Values 
%s [9.79297570e+00 3.37710644e+00 1.20873054e+00 1.13659560e+00
 8.96286859e-01 6.58293128e-01 3.23056525e-01 2.26906613e-01
 1.12741686e-01 7.62069059e-02 6.18393099e-02 4.42420969e-02
 3.12610726e-03 1.01216098e-02 2.99919142e-02 2.67735138e-02
 1.77191935e-02 1.94537446e-02]

Find variance and cumulative variance by each eigen vector

In [30]:
tot = sum(eig_vals)
var_exp = [( i /tot ) * 100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
print("Cumulative Variance Explained", cum_var_exp)
Cumulative Variance Explained [ 54.33850121  73.07712653  79.78403645  86.09068965  91.0639364
  94.71662207  96.50917296  97.76821471  98.39378701  98.81663795
  99.1597671   99.40525421  99.571671    99.72022979  99.82817322
  99.9264921   99.9826541  100.        ]
In [31]:
plt.plot(var_exp)
Out[31]:
[<matplotlib.lines.Line2D at 0x256571cfd30>]
In [32]:
# Ploting 
plt.figure(figsize=(10 , 5))
plt.bar(range(1, eig_vals.size + 1), var_exp, alpha = 0.5, align = 'center', label = 'Individual explained variance')
plt.step(range(1, eig_vals.size + 1), cum_var_exp, where='mid', label = 'Cumulative explained variance')
plt.ylabel('Explained Variance Ratio')
plt.xlabel('Principal Components')
plt.legend(loc = 'best')
plt.tight_layout()
plt.show()
In [33]:
"""
Visually we can observe that their is steep drop in variance explained with increase in number of PC's.
We will proceed with 7 components here.
""" 
Out[33]:
"\nVisually we can observe that their is steep drop in variance explained with increase in number of PC's.\nWe will proceed with 7 components here.\n"

Using PCA command from sklearn and find Principal Components. Transform data to components formed

In [34]:
# NOTE - we are generating only 7 PCA dimensions (dimensionality reduction from 18 to 7)
from sklearn.decomposition import PCA
pca7 = PCA(n_components=7)
pca7.fit(XScaled)
print(pca7.components_)
print(pca7.explained_variance_ratio_)
Xpca7=pca7.transform(XScaled)
    
[[ 0.27225105  0.28537005  0.30148623  0.27259451  0.09857976  0.19475579
   0.31051844 -0.30843834  0.30754849  0.27630107  0.30274811  0.30704063
   0.26152049 -0.04363236  0.0367057   0.05885041  0.03483739  0.08281362]
 [-0.08972848  0.13317394 -0.04402596 -0.20423223 -0.25913686 -0.09457563
   0.07233508 -0.01168768  0.08409153  0.12583663  0.07019986  0.07793366
   0.20992728  0.50391445 -0.01456825 -0.09339805 -0.50166421 -0.50654656]
 [-0.02260451 -0.21080994  0.07087808  0.04021396 -0.11480523 -0.13931348
   0.1129247  -0.09003305  0.11106355 -0.21987769  0.14481876  0.11532395
  -0.21362744  0.06739209 -0.52162344  0.68717064 -0.06220695 -0.04080354]
 [-0.13041903  0.02067855 -0.10742522  0.25295734  0.605228   -0.32253141
   0.01005404 -0.07991176 -0.01604649 -0.06665079  0.06980451  0.01736316
   0.07224572  0.13586056 -0.49012168 -0.38023248  0.03553916 -0.10300842]
 [ 0.15232414 -0.13902259 -0.08073354  0.11901255  0.08321282 -0.62137607
   0.08124056 -0.07473792  0.0775021  -0.24614056  0.14958407  0.11511731
  -0.00754872  0.14052777  0.5898001   0.12779373  0.18158269 -0.11125624]
 [ 0.25837458 -0.06889799 -0.02048009 -0.13944968 -0.58714549 -0.26562469
   0.08933352 -0.07258539  0.09605543 -0.06350149  0.1344589   0.12696867
  -0.07339618 -0.13192887 -0.31241509 -0.4825069   0.27522234  0.06057715]
 [ 0.18879422 -0.39087124  0.17638455  0.15647445  0.10249295  0.39885179
   0.09142373 -0.10487575  0.09067234 -0.34966769  0.07547531  0.06996415
  -0.45585196  0.0790311   0.1301874  -0.31062929 -0.25955786 -0.17634877]]
[0.54338501 0.18738625 0.0670691  0.06306653 0.04973247 0.03652686
 0.01792551]
In [40]:
"""
We can see these are independent and not corelated.So we have reduced dimensions
and covered almost 95 % of variation or information as rest are noise.
""" 
sns.pairplot(pd.DataFrame(Xpca7))
Out[40]:
<seaborn.axisgrid.PairGrid at 0x1b2174d1a58>

SVM

In [35]:
from sklearn import svm
In [ ]:
 
In [36]:
XScaled.shape
Out[36]:
(813, 18)
In [37]:
y.head()
Out[37]:
class
0 2
1 2
2 1
3 2
5 0
In [38]:
# Split the Data.Considering all variables.
X_train,X_test,y_train,y_test=train_test_split(XScaled,y,test_size=.30, random_state=1)
In [39]:
# Gamma is a measure of influence of a data point.C is complexity of the model,lower C value creates simple  hypersurface.
clf=svm.SVC(gamma=0.025,C=3)
clf
Out[39]:
SVC(C=3, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.025, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
In [40]:
clf.fit(X_train,y_train)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out[40]:
SVC(C=3, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.025, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
In [41]:
y_predSVM=clf.predict(X_test)
In [42]:
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_test,y_predSVM)*100
print(('Accuracy of SVM model with all variables is equal '+ str(round(accuracy, 2))) + ' %.')
Accuracy of SVM model with all variables is equal 97.54 %.
In [43]:
# MAking confusion matrix
from sklearn.metrics import confusion_matrix
cmSVMAllVar=confusion_matrix(y_test,y_predSVM)
cmSVMAllVar
Out[43]:
array([[ 78,   0,   0],
       [  1, 119,   1],
       [  1,   3,  41]], dtype=int64)

SVM With PCA(7 Var)

In [44]:
Xpca7.shape
Out[44]:
(813, 7)
In [45]:
# Split the data
X_train7Var,X_test7Var,y_train7Var,y_test7Var=train_test_split(Xpca7,y,test_size=.30,random_state=1)
In [46]:
clf7Var=svm.SVC(gamma=0.25,C=3)
clf7Var
Out[46]:
SVC(C=3, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.25, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
In [47]:
clf7Var.fit(X_train7Var,y_train7Var)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out[47]:
SVC(C=3, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.25, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
In [48]:
y_PredSVM7Var=clf7Var.predict(X_test7Var)
y_PredSVM7Var
Out[48]:
array([1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1, 2, 0, 0, 0, 0, 0, 0, 1, 1, 2,
       1, 2, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       2, 1, 1, 2, 1, 0, 1, 2, 1, 2, 1, 0, 0, 2, 2, 0, 2, 1, 0, 1, 2, 1,
       0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 1, 0, 1, 2, 1, 1, 0, 2, 0, 2,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 0, 1, 0, 2, 1, 0,
       0, 1, 1, 1, 1, 2, 0, 2, 2, 0, 1, 1, 2, 0, 2, 0, 1, 1, 1, 1, 1, 1,
       0, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 2, 2, 1, 1, 1, 1, 2, 1, 0, 0,
       2, 0, 1, 0, 2, 1, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1,
       1, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 0,
       2, 1, 0, 0, 1, 1, 0, 1, 1, 2, 2, 0, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1,
       0, 1], dtype=int8)
In [49]:
from sklearn.metrics import accuracy_score
accuracy7Var=accuracy_score(y_test7Var,y_PredSVM7Var)*100
print(('Accuracy of SVM model with 7 variables is equal '+ str(round(accuracy7Var, 2))) + ' %.')
Accuracy of SVM model with 7 variables is equal 94.67 %.
In [50]:
# MAking confusion matrix
from sklearn.metrics import confusion_matrix
cmSVM7Var=confusion_matrix(y_test7Var,y_PredSVM7Var)
cmSVM7Var
Out[50]:
array([[ 74,   2,   2],
       [  1, 117,   3],
       [  0,   5,  40]], dtype=int64)

Accuracy Comparison of SVM With or Without PCA

In [51]:
print(('Accuracy of SVM model with all variables is equal '+ str(round(accuracy, 2))) + ' %.')
print(('Accuracy of SVM model with 7 variables is equal '+ str(round(accuracy7Var, 2))) + ' %.')
Accuracy of SVM model with all variables is equal 97.54 %.
Accuracy of SVM model with 7 variables is equal 94.67 %.

Naive Bayes

In [52]:
from sklearn.naive_bayes import GaussianNB
classifier=GaussianNB()
In [57]:
# With all variables
classifier.fit(X_train,y_train)

y_predNaiveAllVar=classifier.predict(X_test)
y_predNaiveAllVar

cmNaiveAllVar=confusion_matrix(y_test,y_predNaiveAllVar)
cmNaiveAllVar

accuracyNaiveBayesAllVar=accuracy_score(y_test,y_predNaiveAllVar)*100
print('Accuracy of Naive Bayes for all Variables is equal ' + str(round(accuracy, 2)) + ' %.')
Accuracy of Naive Bayes for all Variables is equal 97.54 %.
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
In [58]:
# With 7 variables
classifier.fit(X_train7Var,y_train7Var)

y_predNaive7Var=classifier.predict(X_test7Var)
y_predNaive7Var

cmNaive7Var=confusion_matrix(y_test7Var,y_predNaive7Var)
cmNaive7Var

accuracyNaiveBayes7Var=accuracy_score(y_test7Var,y_predNaive7Var)*100
print('Accuracy of Naive Bayes for 7 Variables is equal ' + str(round(accuracy, 2)) + ' %.')
Accuracy of Naive Bayes for 7 Variables is equal 97.54 %.
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)

Accuracy of Naive Bayes With or Without PCA

In [59]:
print('Accuracy of Naive Bayes for 7 Variables is equal ' + str(round(accuracy, 2)) + ' %.')
print('Accuracy of Naive Bayes for all Variables is equal ' + str(round(accuracy, 2)) + ' %.')
Accuracy of Naive Bayes for 7 Variables is equal 97.54 %.
Accuracy of Naive Bayes for all Variables is equal 97.54 %.
In [ ]:
 
In [60]:
    # Hyper Parameter
In [61]:
from sklearn.metrics import classification_report
In [63]:
from sklearn.svm import SVC
In [64]:
print(classification_report(y_test, y_predSVM))
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        78
           1       0.98      0.98      0.98       121
           2       0.98      0.91      0.94        45

    accuracy                           0.98       244
   macro avg       0.98      0.96      0.97       244
weighted avg       0.98      0.98      0.98       244

In [65]:
from sklearn.model_selection import GridSearchCV 
  
# defining parameter range 
param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}  
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 
  
# fitting the model for grid search 
grid.fit(X_train, y_train) 
C:\Anaconda\lib\site-packages\sklearn\model_selection\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.518, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.822, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.825, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.810, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.518, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.518, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.518, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.696, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.746, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.704, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.937, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.974, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.974, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.901, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.963, total=   0.0s
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.942, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.518, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.518, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.754, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.772, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.757, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.974, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.984, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.932, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.968, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.984, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.901, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.921, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.910, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.518, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.519, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.754, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.772, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.757, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.958, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.963, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.952, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.979, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.927, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.963, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.968, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.885, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.910, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.894, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.754, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.772, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.757, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.958, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.958, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.942, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.989, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.948, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.989, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.911, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.942, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.942, total=   0.0s
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed:    1.0s finished
C:\Anaconda\lib\site-packages\sklearn\utils\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out[65]:
GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)
In [66]:
# print best parameter after tuning 
print(grid.best_params_) 
  
# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_) 
{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
In [67]:
grid_predictions = grid.predict(X_test) 
  
# print classification report 
print(classification_report(y_test, grid_predictions)) 
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        78
           1       0.98      0.98      0.98       121
           2       0.98      0.93      0.95        45

    accuracy                           0.98       244
   macro avg       0.98      0.97      0.97       244
weighted avg       0.98      0.98      0.98       244

In [ ]: